#####################################################################
##Replication Code for ##############################################
##The Politics of Promotion in China's Foreign Policy Bureaucracy ###
##Tyler Jost and Yucong Li ##########################################
##The China Quarterly ###############################################
##(1) Dataframe Build ###############################################
#####################################################################

library(here)
library(sandwich)
library(lmtest)
library(dplyr)
library(tidyr)
library(reshape2)
library(ggplot2)
library(car)
library(interplot)
library(stargazer)
library(gridExtra)
library(pscl)
library(xtable)
library(stringr)
library(countrycode)

#######################################################
####Load Data##########################################
#######################################################

load("mfa_appointments.RData")

#######################################################
####Create Time-Series for Promotion to Vice Minister##
#######################################################

##Create a pool of individuals who are at the ambassador/assistant minister (部长助理) or above
senior <- d[d$clean_title=="Ambassador" | d$clean_title=="Assistant Minister" | d$keyamb==1 | d$vminsupra==1 | d$minsupra==1 | d$unamb==1 |
              d$clean_title=="Vice Minister" | d$clean_title=="ILD Vice Minister" | d$clean_title=="Foreign Minister" | d$clean_title=="State Councillor" | d$clean_title=="Director General",]

senior <- senior[is.na(senior$start)==F & is.na(senior$end)==F,]

##Next, create a time-series dataframe where each row is an individual-year for individuals within the pool 
vmin <- senior %>% 
  group_by(rn = row_number())  %>% 
  mutate(year = list(start:end))  %>% 
  unnest(cols="year") %>% 
  ungroup()
vmin <- subset(vmin, select=c("cname", "ename", "year", "clean_title", "keyamb", "vminsupra", "location","ildstart", "expertise", "expertise2", 
                                   "expertise3", "expertise4", "expertise5", "expertise6", "expertise7", "expertise8",
                                   "expertise9", "expertise10"))
vmin <- vmin[order(vmin$cname, vmin$year),]


##Code Vice Minister
vmin$vmin <- ifelse(vmin$clean_title=="Vice Minister",1,0)

##Code ILD Vice Minister
vmin$ildvm <- ifelse(vmin$clean_title=="ILD Vice Minister",1,0)

##Code Minister
vmin$fm <- ifelse(vmin$clean_title=="Foreign Minister",1,0)

##Code State Councillor
vmin$sc <- ifelse(vmin$clean_title=="State Councillor",1,0)

##Create the Promotion Variable
vmin$vm_promote <- NA
vmin$drop <- NA
for(i in 1:nrow(vmin)){
  name <- vmin$cname[i]
  if(vmin$keyamb[i]==1 | vmin$vminsupra[i]==1 | vmin$vmin[i]==1 | vmin$ildvm[i]==1 ){
    
    min.year <- min(vmin$year[
      vmin$cname == name & 
        (vmin$keyamb == 1 | vmin$vminsupra == 1 | vmin$vmin == 1 | vmin$ildvm == 1)
    ])
    vmin$vm_promote[vmin$cname == name & vmin$year >= min.year & 
                         (vmin$keyamb == 1 | vmin$vminsupra == 1 | vmin$vmin == 1 | vmin$ildvm == 1)] <- 1
    vmin$vm_promote[vmin$cname == name & 
                         (vmin$year < min.year | 
                            !(vmin$keyamb == 1 | vmin$vminsupra == 1 | vmin$vmin == 1 | vmin$ildvm == 1))] <- 0
    vmin$drop[vmin$cname == name & vmin$year > min.year] <- 1
    vmin$drop[vmin$cname == name & vmin$year <= min.year] <- 0
  } else {
    vmin$vm_promote[i] <- 0
    vmin$drop[i] <- 0
  }
  print(i / nrow(vmin))
}

##Remove duplicate rows for years in which individual held two assignments
vmin <- vmin %>%
  group_by(cname, year) %>%
  arrange(desc(vm_promote)) %>%  # keep the row where vm_promote == 1
  slice(1) %>%                   
  ungroup()

length(unique(vmin$cname)) #1350 total
length(unique(vmin$cname[vmin$vm_promote==1])) #189 promote
round((189/(18+1350)*100)) #about 14% of the sample promotes to vice minister level


#########################################################
####Add demographic data#################################
#########################################################

load("mfa_personnel.RData")

##Demographics
holder <- subset(individual, select=c("cname", "birth_year", "male", "abroad", "civilcollege", "military", "long_march", "ild", "preprc_diplomat", "princeling", 
                                      "arts_major", "language_major", "science_major", "socialscience_major"))
vmin <- merge(vmin, holder, by="cname", all.x = T)

##Age
vmin$age <- vmin$year - vmin$birth_year

##Identify Beijing assignments
vmin$beijing <- ifelse(vmin$location=="Beijing",1,0)

##Identify priority assignments
priorityissues <- c("United States", "Soviet Union", "Russia", "France", "United Kingdom", "United Nations", "Japan", 
                    "North Korea", "Taiwan", "North America")
vmin$priority <- ifelse(vmin$expertise %in% priorityissues | vmin$expertise2 %in% priorityissues |
                             vmin$expertise3 %in% priorityissues | vmin$expertise4 %in% priorityissues |
                             vmin$expertise5 %in% priorityissues | vmin$expertise6 %in% priorityissues |
                             vmin$expertise7 %in% priorityissues | vmin$expertise8 %in% priorityissues |
                             vmin$expertise9 %in% priorityissues | vmin$expertise10 %in% priorityissues,1,0)

##Create a clean dataframe for the descriptive analysis
descriptives <- subset(vmin, select=c("cname", "ename", "year", "vm_promote", "age", "male", "abroad", "civilcollege", "military", "long_march", "ild",
                                         "arts_major", "language_major", "science_major", "socialscience_major"))
descriptives$male <- as.numeric(descriptives$male)
descriptives <- descriptives[descriptives$year>=1949 & descriptives$year<=2023,]

##Save dataframe for the descriptive analysis
save(descriptives, file="descriptives.RData")


##Moving forward, create a dataframe for the vice minister promotion analysis (omitting individuals who have already promoted to vice minister level) 
vmin <- vmin[vmin$drop==0,]



#########################################################
####Create Junior Experience Variables###################
#########################################################

##Next, create a dataframe to measure the number of junior assignments
junior <- d[d$clean_title!="Ambassador" & d$clean_title!="Assistant Minister" & d$keyamb!=1 & d$vminsupra!=1 &
              d$clean_title!="Vice Minister" & d$clean_title!="Foreign Minister" & d$clean_title!="State Councillor" &
              d$clean_title!="Director General" & d$type!="Senior Ministry",]

juniorhomeposts <- junior %>% 
  filter(location=="Beijing") %>%
  group_by(cname) %>%
  summarize(count = n())
juniorhomeposts <- as.data.frame(juniorhomeposts)
colnames(juniorhomeposts) <- c("cname", "juniorhomeposts")

juniorabroadposts <- junior %>% 
  filter(location!="Beijing") %>%
  group_by(cname) %>%
  summarize(count = n())
juniorabroadposts <- as.data.frame(juniorabroadposts)
colnames(juniorabroadposts) <- c("cname", "juniorabroadposts")

juniorpriorityposts <- junior %>% 
  filter(expertise %in% priorityissues | 
           expertise2 %in% priorityissues | 
           expertise3 %in% priorityissues | 
           expertise4 %in% priorityissues | 
           expertise5 %in% priorityissues | 
           expertise6 %in% priorityissues | 
           expertise7 %in% priorityissues | 
           expertise8 %in% priorityissues | 
           expertise9 %in% priorityissues | 
           expertise10 %in% priorityissues) %>%
  group_by(cname) %>%
  summarize(count = n())
juniorpriorityposts <- as.data.frame(juniorpriorityposts)
colnames(juniorpriorityposts) <- c("cname", "juniorpriorityposts")

##Putting Things Together
vmin <- merge(vmin, juniorhomeposts, by="cname", all.x=T)
vmin <- merge(vmin, juniorabroadposts, by="cname", all.x=T)
vmin <- merge(vmin, juniorpriorityposts, by="cname", all.x=T)

vmin$juniorhomeposts[is.na(vmin$juniorhomeposts)==T] <- 0
vmin$juniorabroadposts[is.na(vmin$juniorabroadposts)==T] <- 0
vmin$juniorpriorityposts[is.na(vmin$juniorpriorityposts)==T] <- 0


##Code above does not distinguish between individuals for whom we have no junior data and individuals who we can confirm had no junior experience
##We manually identified individuals for whom no information about their junior diplomatic experience is available
missingjuniorexperience <- c("丁伟", "严鹏", "乐俊清", "于武真", "仓友衡", "任景玉", "何金才", "傅元聪", "关恒广", "关登明", "关金地", 
                             "冀敬义", "冯志军", "刘伯鸣", "刘关仁", "刘志海", "刘文信", "刘昌业", "刘昕生", "刘正修", "刘永兴", "刘玉坤", 
                             "史永久", "叶弘良", "吴久洪", "吴传福", "吴祖荣", "吴虹滨", "吴连起", "周国斌", "周锦明", "夏守安", "夏树元", 
                             "孔明辉", "孙兆通", "孙和平", "孙国桐", "孙大栋", "孙延珩", "孙昆山", "孙治荣", "孟宪科", "宋国清", "宋增寿", 
                             "完永祥", "尹作金", "尹玉福", "居一杰", "崔广俊", "崔志伟", "左学良", "左树森", "左福荣", "廖启平", "张世华", 
                             "张佐", "张俊岐", "张克远", "张史贤", "张国强", "张宝生", "张序江", "张延年", "张愉", "张成礼", "张栋", "张润", 
                             "张滨华", "张真", "张鹏翔", "张龙宝", "徐代杰", "徐建国2", "徐次农", "徐绍海", "徐英杰", "戴诗琪", "戴诗祺", 
                             "智昭林", "曲阜君", "朱培庆", "朱曼黎", "李书元", "李国学", "李国新", "李宝城", "李尚胜", "李成仁", "李树立", 
                             "李正君", "李永禄", "李永谦", "李清玉", "李留根", "李祖沛", "李连甫", "李钦平", "李长和", "杨世祥", "杨克容", 
                             "杨友勇", "杨增业", "杨斌伟", "杨智宽", "杨鹤熊", "林廷海", "林真", "林贞龙", "柯小刚", "柳白", "梁健明", "梁栋", 
                             "樊桂金", "段春来", "沈庆", "沈江宽", "温西贵", "潘祥康", "焦东村", "王作峰", "王俊岭", "王其良", "王卫国", "王四法", 
                             "王家骥", "王富元", "王小庄", "王少华", "王廉", "王延义", "王建兴", "王建立", "王新元", "王景琪", "王森", "王永成", 
                             "王永秋", "王玉林", "王荣华", "王行达", "王钊贤", "瞿文明", "石同宁", "程学源", "程振声", "程树平", "穆文", "章颂先", 
                             "符华强", "肖思晋", "肖明", "肖晗", "肖清华", "胡守勤", "胡立鹏", "艾平", "范振水", "蒋元德", "蒋正云", "薛金维", 
                             "袁祖德", "许军", "许士国", "许孟水", "许昌财", "谢志衡", "谢月娥", "谢邦定", "谭兴举", "谭声琤", "赵五一", "赵学昌", 
                             "赵家骅", "赵志远", "赵惠民", "赵振宇", "赵春胜", "赵连义", "邓朝从", "邓绍勤", "邱胜云", "邵关福", "邵炯初", "邹肖力", 
                             "郑小龙", "郑清典", "郭业洲", "郭天民", "郭邦彦", "郭靖安", "金森", "钟汉九", "钱乃成", "钱锦昌", "陆伯源", "陆树林", 
                             "陈德来", "陈德福", "陈忠诚", "陈振友", "陈永成", "陶卫光", "陶苗发", "韩琍琍", "顾品锷", "顾懋萱", "马书学", "马志学", 
                             "马恩汉", "马维茂", "高善海", "高建", "高文献", "鲍树生", "鲍鄂生", "麦国彦", "黄家骙", "黄敏慧", "黄桂芳", "黄舍骄", 
                             "齐治家", "龚猎夫")

##Now, incorporate this into our measures
vmin$juniorhomeposts[vmin$name %in% missingjuniorexperience] <- NA
vmin$juniorabroadposts[vmin$name %in% missingjuniorexperience] <- NA
vmin$juniorpriorityposts[vmin$name %in% missingjuniorexperience] <- NA


##Now, create a single variable that measures the share of posts abroad
vmin$juniorshareabroad <- vmin$juniorabroadposts / (vmin$juniorabroadposts + vmin$juniorhomeposts)

##Create clean dataframe
vmin <- subset(vmin, select=c("cname", "ename", "year", "vm_promote", "age", "male", "civilcollege", "military", "long_march", "ildstart", 
                                    "princeling", "location", "expertise", "juniorhomeposts", "juniorabroadposts", "juniorpriorityposts", "beijing", "priority"))


#########################################################
####Create Senior Experience Variables###################
#########################################################

##Total Senior Experience
vmin <- vmin[is.na(vmin$cname)==F,] 
vmin <- vmin[order(vmin$cname, vmin$year),]
vmin$seniorexperience <- sequence(rle(vmin$cname)$lengths)

##Total Senior Time in Beijing and Abroad
vmin <- vmin %>%
  group_by(cname) %>%
  mutate(beijingexperience = cumsum(location == "Beijing"),
         seniorabroadexperience = cumsum(location != "Beijing"),
         seniorshareabroad = seniorabroadexperience / (seniorabroadexperience + beijingexperience)) %>%
  ungroup()

vmin <- vmin %>%
  group_by(cname) %>%
  mutate(priorityexperience = cumsum(priority==1)) %>%
  ungroup()


##Militarized Disputes 
mids <- read.csv("dyadic_mid_4.02.csv")
mids <- mids[mids$statea==710 & mids$year>=1949,]
mids <- subset(mids, select=c("stateb", "endyear", "outcome"))
mids$region <- countrycode(mids$stateb, "cown", "region")
mids$region[mids$region=="East Asia & Pacific"] <- "Asia"
mids$region[mids$region=="Latin American & Caribbean"] <- "Latin America"
mids$region[mids$region=="Sub-Saharan Africa"] <- "Africa"
mids$region[mids$region=="South Asia"] <- "Asia"
mids$region[mids$region=="Europe & Central Asia"] <- "Europe"
mids$stateb <- countrycode(mids$stateb, "cown", "country.name")
mids$mid <- 1

#Favorable MID Outcomes
#0 is Ongoing
#1 is Victory for State A (China)
#2 is Victory for State B
#3 is Yield by State A
#4 is Yield by State B
#5 is Stalemate
#6 is Compromise
#7 is Released for Seizures
#8 is Unclear (missing)
#9 is Missing

mids$favorablemidout <- ifelse(mids$outcome %in% c(1,4,6),1,0)

colnames(mids) <- c("country", "year", "outcome", "region", "mid", "favorablemidout")
mids.region <- subset(mids, select=c("region", "year", "favorablemidout", "mid"))
mids.country <- subset(mids, select=c("country", "year", "favorablemidout", "mid"))

#Count No. of MIDs
counts <- mids %>%
  group_by(year, country) %>%
  summarize(
    midcount = n(),
    midsuccesscount = sum(favorablemidout == 1, na.rm = TRUE)
  ) %>%
  ungroup()
counts <- as.data.frame(counts)
midcountries <- unique(counts$Var1[counts$Freq>0])

vmin <- merge(vmin, counts, by.x = c("location", "year"), by.y = c("country", "year"), all.x = T)
vmin$midcount[is.na(vmin$midcount)==T] <- 0
vmin$midsuccesscount[is.na(vmin$midsuccesscount)==T] <- 0

##Carry forward having been in an assignment covering a MID
vmin <- vmin %>%
  group_by(cname) %>%
  arrange(cname, year) %>%
  mutate(
    totalmidcount = cumsum(midcount),
    totalmidsuccesscount = cumsum(midsuccesscount)
  ) %>%
  ungroup()


##Treaties
load("prc_treaties.RData")
t <- subset(t, select=c("year", "english"))
colnames(t) <- c("year", "country")
t$treaty <- 1

#Aggregate to get a count of all treaties in a given year
t.agg <- aggregate(treaty ~ year + country, data = t, FUN = sum, na.rm = TRUE)

#Merge
vmin <- merge(vmin, t.agg, by.x = c("location", "year"), by.y = c("country", "year"), all.x = T, all.y = F)
vmin$treaty[is.na(vmin$treaty)==T] <- 0

##Sum the number of treaties the individual has signed
vmin <- vmin %>%
  group_by(cname) %>%
  mutate(totaltreaty = cumsum(treaty))

##Add ID for each name (for clustered standard errors)
vmin$id <- as.numeric(factor(vmin$cname))

vmin$count_bin <- cut(vmin$seniorexperience, 
                         breaks = seq(-3, max(vmin$seniorexperience) + 3, by = 3), 
                         labels = FALSE)

##Add variable for each party congress
vmin$pc <- "Seventh"
vmin$pc[vmin$year>=1956 & vmin$year<1969] <- "Eighth"
vmin$pc[vmin$year>=1969 & vmin$year<1973] <- "Ninth"
vmin$pc[vmin$year>=1973 & vmin$year<1977] <- "Tenth"
vmin$pc[vmin$year>=1977 & vmin$year<1982] <- "Eleventh"
vmin$pc[vmin$year>=1982 & vmin$year<1987] <- "Twelfth"
vmin$pc[vmin$year>=1987 & vmin$year<1992] <- "Thirteenth"
vmin$pc[vmin$year>=1992 & vmin$year<1997] <- "Fourteenth"
vmin$pc[vmin$year>=1997 & vmin$year<2002] <- "Fifteenth"
vmin$pc[vmin$year>=2002 & vmin$year<2007] <- "Sixteenth"
vmin$pc[vmin$year>=2007 & vmin$year<2012] <- "Seventeenth"
vmin$pc[vmin$year>=2012 & vmin$year<2017] <- "Eighteenth"
vmin$pc[vmin$year>=2017 & vmin$year<2022] <- "Nineteenth"
vmin$pc[vmin$year>=2022] <- "Twentieth"

#MID data is only through 2014
vmin <- vmin[vmin$year<=2014,]

##Save dataframe
vmin <- vmin
save(vmin, file="ts_vminister.RData")




#########################################################
####Create Time-Series for Promotion Above Vice Minister#
#########################################################

##Identify Senior-Most MFA Officials
senior <- d[d$keyamb==1 | d$clean_title=="Vice Minister" | d$clean_title=="Foreign Minister" | d$clean_title=="State Councillor" | d$clean_title=="ILD Minister" |
            d$clean_title=="State Councillor" | d$minsupra==1 | d$vminsupra==1,]
senior <- senior[is.na(senior$start)==F & is.na(senior$end)==F,]

##Next, create a time-series dataframe where each row is an individual-year for individuals within the pool 
minister <- senior %>% 
  group_by(rn = row_number())  %>% 
  mutate(year = list(start:end))  %>% 
  unnest(cols="year") %>% 
  ungroup()
minister <- subset(minister, select=c("cname", "ename", "year", "clean_title", "ildstart","location", "minsupra", "expertise", "expertise2", 
                                    "expertise3", "expertise4", "expertise5", "expertise6", "expertise7", "expertise8", 
                                    "expertise9", "expertise10"))
minister <- minister[order(minister$cname, minister$year),]


##Code Minister
minister$fm <- ifelse(minister$clean_title=="Foreign Minister",1,0)

##Code State Councillor
minister$sc <- ifelse(minister$clean_title=="State Councillor",1,0)

##Create the Promotion Variable
minister$high_promote <- 0
minister$drop <- 0
for(i in 1:nrow(minister)){
  name <- minister$cname[i]
  
  if(minister$minsupra[i]==1 | minister$fm[i]==1 | minister$sc[i]==1){
    min.year <- min(minister$year[
      minister$cname == name & 
        (minister$minsupra == 1 | minister$fm == 1 | minister$sc == 1)
    ])
    minister$high_promote[minister$cname == name & 
                           minister$year >= min.year & 
                           (minister$minsupra == 1 | minister$fm == 1 | minister$sc == 1)] <- 1
    minister$high_promote[minister$cname == name & 
                           (minister$year < min.year | 
                              !(minister$minsupra == 1 | minister$fm == 1 | minister$sc == 1))] <- 0
    minister$drop[minister$cname == name & minister$year > min.year] <- 1
    minister$drop[minister$cname == name & minister$year <= min.year] <- 0
  } else {
    minister$high_promote[i] <- 0
  }
  
  print(i / nrow(minister))
}

#Keep only one row for each diplomat in a given year
minister <- minister %>%
  group_by(cname, year) %>%
  arrange(desc(high_promote)) %>%  # keep the row where high_promote == 1 
  slice(1) %>%                   
  ungroup()

length(unique(minister$cname)) #182 total
length(unique(minister$cname[minister$high_promote==1])) #25 promote
round((25/182)*100) #about 14% of the sample promotes to vice minister level

##Now, subset the sample to just individuals who have not yet promoted to vice minister level
minister <- minister[minister$drop==0,]


#########################################################
####Merge Junior Experience Variables####################
#########################################################

minister <- merge(minister, juniorhomeposts, by="cname", all.x=T)
minister <- merge(minister, juniorabroadposts, by="cname", all.x=T)
minister <- merge(minister, juniorpriorityposts, by="cname", all.x=T)

minister$juniorhomeposts[is.na(minister$juniorhomeposts)==T] <- 0
minister$juniorabroadposts[is.na(minister$juniorabroadposts)==T] <- 0
minister$juniorpriorityposts[is.na(minister$juniorpriorityposts)==T] <- 0

##Fix missingness issue (as in vice minister level)
minister$juniorhomeposts[minister$name %in% missingjuniorexperience] <- NA
minister$juniorabroadposts[minister$name %in% missingjuniorexperience] <- NA
minister$juniorpriorityposts[minister$name %in% missingjuniorexperience] <- NA

#########################################################
####Merge Senior Experience Variables####################
#########################################################

juniorsenior <- vmin %>%
  group_by(cname) %>%
  filter(year == max(year)) %>%
  ungroup()
juniorsenior <- subset(juniorsenior, select=c("cname", "seniorshareabroad", "seniorexperience", "seniorabroadexperience", "totalmidcount", "totalmidsuccesscount", "totaltreaty"))
colnames(juniorsenior) <- c("cname", "juniorsenior_shareaborad", "juniorsenior_seniorexperience", "juniorsenior_abroadexperience", "juniorsenior_totalmidcount", "juniorsenior_totalmidsuccesscount", "juniorsenior_totaltreaty")

minister <- merge(minister, juniorsenior, by="cname", all.x=T)

#########################################################
####Create Senior Experience Variables###################
#########################################################

##Demographics
holder <- subset(individual, select=c("cname", "birth_year", "male", "civilcollege", "military", "long_march", "socialscience_major", "princeling"))
minister <- merge(minister, holder, by="cname", all.x = T)
minister <- minister[order(minister$cname, minister$year),]

##Age
minister$age <- minister$year - minister$birth_year

minister$beijing <- ifelse(minister$location=="Beijing",1,0)
minister$priority <- ifelse(minister$expertise %in% priorityissues | minister$expertise2 %in% priorityissues |
                             minister$expertise3 %in% priorityissues | minister$expertise4 %in% priorityissues |
                             minister$expertise5 %in% priorityissues | minister$expertise6 %in% priorityissues |
                             minister$expertise7 %in% priorityissues | minister$expertise8 %in% priorityissues |
                             minister$expertise9 %in% priorityissues | minister$expertise10 %in% priorityissues,1,0)
minister <- minister[!duplicated(minister),]


##Total Senior Experience
minister <- minister[is.na(minister$cname)==F,] 
minister <- minister[order(minister$cname, minister$year),]
minister$seniorexperience <- sequence(rle(minister$cname)$lengths) - 1

##Total Senior Experience
minister <- minister[is.na(minister$cname)==F,] 
minister$seniorexperience <- sequence(rle(minister$cname)$lengths)

##Total Senior Time in Beijing and Abroad
minister <- minister %>%
  group_by(cname) %>%
  mutate(beijingexperience = cumsum(location == "Beijing"),
         seniorabroadexperience = cumsum(location != "Beijing"),
         seniorshareabroad = seniorabroadexperience / (seniorabroadexperience + beijingexperience)) %>%
  ungroup()

minister <- minister %>%
  group_by(cname) %>%
  mutate(priorityexperience = cumsum(priority==1)) %>%
  ungroup()


##Militarized Disputes 
minister <- merge(minister, counts, by.x = c("location", "year"), by.y = c("country", "year"), all.x = T)
minister$midcount[is.na(minister$midcount)==T] <- 0
minister$midsuccesscount[is.na(minister$midsuccesscount)==T] <- 0

##Carry forward having been in an assignment covering a MID
minister <- minister %>%
  group_by(cname) %>%
  arrange(cname, year) %>%
  mutate(
    totalmidcount = cumsum(midcount),
    totalmidsuccesscount = cumsum(midsuccesscount)
  ) %>%
  ungroup()


##Treaties
minister <- merge(minister, t.agg, by.x = c("location", "year"), by.y = c("country", "year"), all.x = T, all.y = F)
minister$treaty[is.na(minister$treaty)==T] <- 0

##Treaties with major powers
t <- t %>%
  mutate(priority_treaty = ifelse(country %in% priorityissues, 1, 0))

#Aggregate
t_priority.agg <-  aggregate(priority_treaty ~ year + country, data = t, FUN = sum, na.rm = TRUE)

#Merge
minister <- merge(minister, t_priority.agg, by.x = c("location", "year"), by.y = c("country", "year"), all.x = T, all.y = F)
minister$priority_treaty[is.na(minister$priority_treaty)==T] <- 0

##Sum the number of treaties the individual has signed
minister <- minister %>%
  group_by(cname) %>%
  mutate(totaltreaty = cumsum(treaty),
         totalprioritytreaty = cumsum(priority_treaty))

##Add unique ID
minister$id <- as.numeric(factor(minister$cname))

minister$pc <- "Seventh"
minister$pc[minister$year>=1956 & minister$year<1969] <- "Eighth"
minister$pc[minister$year>=1969 & minister$year<1973] <- "Ninth"
minister$pc[minister$year>=1973 & minister$year<1977] <- "Tenth"
minister$pc[minister$year>=1977 & minister$year<1982] <- "Eleventh"
minister$pc[minister$year>=1982 & minister$year<1987] <- "Twelfth"
minister$pc[minister$year>=1987 & minister$year<1992] <- "Thirteenth"
minister$pc[minister$year>=1992 & minister$year<1997] <- "Fourteenth"
minister$pc[minister$year>=1997 & minister$year<2002] <- "Fifteenth"
minister$pc[minister$year>=2002 & minister$year<2007] <- "Sixteenth"
minister$pc[minister$year>=2007 & minister$year<2012] <- "Seventeenth"
minister$pc[minister$year>=2012 & minister$year<2017] <- "Eighteenth"
minister$pc[minister$year>=2017 & minister$year<2022] <- "Nineteenth"
minister$pc[minister$year>=2022] <- "Twentieth"

minister$count_bin <- cut(minister$seniorexperience, 
                         breaks = seq(-5, max(minister$seniorexperience) + 5, by = 5), 
                         labels = FALSE)

##Create measures that capture all Ambassador/VM experience
minister$alltreaties <- minister$totaltreaty + minister$juniorsenior_totaltreaty
minister$allmid <- minister$totalmidcount + minister$juniorsenior_totalmidcount
minister$allseniorshareabroad <- (minister$juniorsenior_abroadexperience + minister$seniorabroadexperience) / (minister$juniorsenior_seniorexperience + minister$seniorexperience)

##Now, subset to appointment years
sampleyears <- unique(minister$year[minister$high_promote==1])
minister <- minister[minister$year %in% sampleyears,]

#MID data is only through 2014
minister <- minister[minister$year<=2014,]

##Save the dataframe
save(minister, file="ts_minister.RData")


#########################################################
####Network Connections##################################
#########################################################

a <- d[is.na(d$start)==F & is.na(d$end)==F,]
a <- a %>% 
  group_by(rn = row_number())  %>% 
  mutate(year = list(start:end))  %>% 
  unnest(cols="year") %>% 
  ungroup()
a <- subset(a, select=c("cname", "ename", "year", "clean_title", "location", "expertise", "expertise2", "expertise3", 
                   "expertise4", "expertise5", "expertise6", "expertise7", "expertise8", "expertise9", "expertise10"))
a$connectionsabroad <- NA
a$connectionshome <- NA

for(i in 1:nrow(a)){
  year <- a$year[i]
  name <- a$cname[i]
  location <- a$location[i]
  expertise <- a$expertise[i]
  if(location!="Beijing" & is.na(location)==F){
    index <- a[a$location==location & a$year==year & is.na(a$location==location)==F & a$cname!=name,]
    if(length(index$cname)>0){
      a$connectionsabroad[i] <- paste(unique(index$cname), collapse = ", ")
    }
  } else if(is.na(location)==F){
    index <- a[a$location=="Beijing" & a$expertise==expertise & a$year==year & is.na(a$location==location)==F & a$cname!=name,]
    if(length(index$cname)>0){
      a$connectionshome[i] <- paste(unique(index$cname), collapse = ", ")
    }
  }
  print(i/nrow(a))
}

# Aggregate the names by individual-year
a <- a %>%
  group_by(cname, year) %>%
  summarize(connectionshome = paste(unique(na.omit(connectionshome)), collapse = ', '),
            connectionsabroad = paste(unique(na.omit(connectionsabroad)), collapse = ', '))

a$connectionshome[a$connectionshome==""] <- "None"
a$connectionsabroad[a$connectionsabroad==""] <- "None"

#Now, aggregate all connections built over time

# Define a custom function to accumulate connection names
accumulate_connections <- function(connections) {
  result <- character(length(connections))
  for (i in seq_along(connections)) {
    if (!is.na(connections[i])) {
      result[i] <- paste(na.omit(connections[1:i]), collapse = ", ")
    }
  }
  return(result)
}

# Calculate the total connections 
a <- a %>%
  group_by(cname) %>%
  mutate(totalconnectionsabroad = accumulate_connections(connectionsabroad))

a <- a %>%
  group_by(cname) %>%
  mutate(totalconnectionshome = accumulate_connections(connectionshome))

#Now, remove duplicate connections
for(i in 1:nrow(a)){
  holder <- paste(unique(unlist(strsplit(a$totalconnectionsabroad[i], ", "))), collapse = ", ")
  if(length(holder)>0){
    a$totalconnectionsabroad[i] <- holder
  }
  print(i/nrow(a))
}
a$totalconnectionsabroad <- str_remove(a$totalconnectionsabroad, "None,")
a$totalconnectionsabroad <- str_remove(a$totalconnectionsabroad, "None")
a$totalconnectionsabroad <- str_remove(a$totalconnectionsabroad, "NA,")

for(i in 1:nrow(a)){
  holder <- paste(unique(unlist(strsplit(a$totalconnectionshome[i], ", "))), collapse = ", ")
  if(length(holder)>0){
    a$totalconnectionshome[i] <- holder
  }
  print(i/nrow(a))
}
a$totalconnectionshome <- str_remove(a$totalconnectionshome, "None,")
a$totalconnectionshome <- str_remove(a$totalconnectionshome, "None")
a$totalconnectionshome <- str_remove(a$totalconnectionshome, "NA,")

# Remove extra white space
remove_extra_whitespace <- function(input_string) {
  cleaned_string <- gsub("\\s+", " ", input_string)  # Replace multiple spaces with a single space
  cleaned_string <- trimws(cleaned_string)           # Remove leading and trailing spaces
  return(cleaned_string)
}

# Apply the function to each row in the 'names' column
a$totalconnectionshome <- sapply(a$totalconnectionshome, remove_extra_whitespace)
a$totalconnectionsabroad <- sapply(a$totalconnectionsabroad, remove_extra_whitespace)

# Remove trailing commas
remove_trailing_commas <- function(input_string) {
  cleaned_string <- sub(",+$", "", input_string)
  return(cleaned_string)
}

# Apply the function to each row in the 'names' column
a$totalconnectionshome <- sapply(a$totalconnectionshome, remove_trailing_commas)
a$totalconnectionsabroad <- sapply(a$totalconnectionsabroad, remove_trailing_commas)

##Counts
a$totalconnectionshome_count <- 0
a$totalconnectionsabroad_count <- 0
for(i in 1:nrow(a)){
  a$totalconnectionshome_count[i] <- length(unlist(strsplit(a$totalconnectionshome[i], ",")))
  a$totalconnectionsabroad_count[i] <- length(unlist(strsplit(a$totalconnectionsabroad[i], ",")))
}

a$totalconnections_count <- a$totalconnectionshome_count + a$totalconnectionsabroad_count

#Now, merge back into a connections dataframe
a <- subset(a, select=c("cname", "year", "totalconnections_count", "totalconnectionsabroad_count", "totalconnectionshome_count"))

vmin_connections <- merge(vmin, a, by=c("cname", "year"), all.x = T)
save(vmin_connections, file="ts_vminister_connections.RData")

minister_connections <- merge(minister, a, by=c("cname", "year"), all.x = T)
save(minister_connections, file="ts_minister_connections.RData")
